wUpperS <- function(w){ w * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating w*
wLowerS <- function(w){ 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
wS = integrate(wUpperS,0,1)[[1]]/integrate(wLowerS,0,1)[[1]]
#function inside numerator for calculating h*private
privateUpperS <- function(w){ (w^2)*((1-w)^2) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating h*private
privateLowerS <- function(w){ w*((1-w)^2) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hprivateS = integrate(privateUpperS,0,1)[[1]]/integrate(privateLowerS,0,1)[[1]]
#function inside numerator for calculating h*shared
sharedUpperS <- function(w){ (w^3)*(1-w) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating h*shared
sharedLowerS <- function(w){ (w^2)*(1-w) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hsharedS = integrate(sharedUpperS,0,1)[[1]]/integrate(sharedLowerS,0,1)[[1]]
#compute tbarhat
tbarS <- fbar + (fbar-gbar)/hprivateS
#compute shat
shatS <- fbar + (gbar-fbar)/(1-hsharedS)
#compute alpha*
alphaS <- (n * wS) / (n * wS + 1 - wS)
NCAAThetaS[i] <- (alphaS * tbarS + (1 - alphaS) * shatS)/100   #compute thetahat_S, convert to probability
NCAAThetaSImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaS[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR NESTED-SYMMETRIC PIVOTING PROCEDURE
#regress g on f among respondents who gave both f and g and f!=g
gneqf <- lm(g ~ f, data = datagneqf)
NSwhat <- coef(summary(gneqf))["f","Estimate"]
NSse <- coef(summary(gneqf))["f","Std. Error"]
#Note to get the t density w/ mean mu & s.d. sigma we need to use 1/sigma*dt((x - mu)/sigma, df)
#also note df = n-2
#function inside numerator for calculating (wp)*
wpUpperNS <- function(w){ w * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating w*
wpLowerNS <- function(w){ 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
wpNS = integrate(wpUpperNS,0,1)[[1]]/integrate(wpLowerNS,0,1)[[1]]
#function inside numerator for calculating h*private
privateUpperNS <- function(w){ (w^2)*((1-w)^2) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating h*private
privateLowerNS <- function(w){ w*((1-w)^2) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hprivateNS = integrate(privateUpperNS,0,1)[[1]]/integrate(privateLowerNS,0,1)[[1]]
#function inside numerator for calculating h*shared
sharedUpperNS <- function(w){ (w^3)*(1-w) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating h*shared
sharedLowerNS <- function(w){ (w^2)*(1-w) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hsharedNS = integrate(sharedUpperNS,0,1)[[1]]/integrate(sharedLowerNS,0,1)[[1]]
#compute tbarhatNS
tbarNS <- fbar + (fbar-gbar)/hprivateNS
#compute shatNS
shatNS <- fbar + (gbar-fbar)/(1-hsharedNS)
#compute alphaNS
alphaNS <- (n * wpNS) / (n * wpNS + 1 - wpNS)
NCAAThetaNS[i] <- (alphaNS * tbarNS + (1 - alphaNS) * shatNS)/100 #compute thetahat_NS, convert to probability
NCAAThetaNSImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaNS[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
}
t.test((NCAAOutcome-NCAAFBar)^2,NCAAIndivBrierF,mu=0,paired=TRUE)
mean(NCAAIndivBrierF)
mean((NCAAOutcome-NCAAFBar)^2)
#paired t-test Brier Score based on game results, fbar versus thetaS
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaS)^2,mu=0,paired=TRUE)
mean((NCAAOutcome-NCAAThetaS)^2)
sd((NCAAOutcome-NCAAThetaS)^2)
#paired t-test Brier Score based on game results, fbar versus thetaN
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaN)^2,mu=0,paired=TRUE)
mean((NCAAOutcome-NCAAThetaN)^2)
sd((NCAAOutcome-NCAAThetaN)^2)
#paired t-test Brier Score based on game results, fbar versus thetaNS
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaNS)^2,mu=0,paired=TRUE)
mean((NCAAOutcome-NCAAThetaNS)^2)
sd((NCAAOutcome-NCAAThetaNS)^2)
#paired t-test Brier Score based on game results, fbar versus thetaM
mean((NCAAOutcome-NCAAThetaM)^2)
sd((NCAAOutcome-NCAAThetaM)^2)
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaM)^2,mu=0,paired=TRUE)
#Market probabilities Brier Score based on game results
mean((NCAAOutcome-NCAAMarketTheta)^2)
sd((NCAAOutcome-NCAAMarketTheta)^2)
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAMarketTheta)^2,mu=0,paired=TRUE) #fbar versus NCAAMarketTheta
#f_i versus fbar closer to market probabilities
mean(NCAAIndivAbsErrorF)
t.test(NCAAIndivAbsErrorF,abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar distance to market probabilities
mean(abs(NCAAMarketTheta-NCAAFBar))
sd(abs(NCAAMarketTheta-NCAAFBar))
#fbar versus thetaS closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaS))
sd(abs(NCAAMarketTheta-NCAAThetaS))
t.test(abs(NCAAMarketTheta-NCAAThetaS),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar versus thetaN closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaN))
sd(abs(NCAAMarketTheta-NCAAThetaN))
t.test(abs(NCAAMarketTheta-NCAAThetaN),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar versus thetaNS closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaNS))
sd(abs(NCAAMarketTheta-NCAAThetaNS))
t.test(abs(NCAAMarketTheta-NCAAThetaNS),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar versus thetaM closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaM))
sd(abs(NCAAMarketTheta-NCAAThetaM))
t.test(abs(NCAAMarketTheta-NCAAThetaM),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
# Brier Score Decomposition
# Uncertainty component obar*(1-obar)
obar <- mean(NCAAOutcome)
obar*(1-obar)
# Use function below to map the crowd forecasts to one of 11 categories: <5% -> 0%, 5%-14.99% -> 10%, 15%-24.99% -> 20%, ..., >=95% -> 100%,
ProbCategory <- function(p){floor(10*(pmin(pmax(p+0.05,0),1)))/10}
NCAAFBarCat <- ProbCategory(NCAAFBar)
NCAAThetaMCat <- ProbCategory(NCAAThetaM)
NCAAMarketCat <- ProbCategory(NCAAMarketTheta)
NCAAForecasts <- data.frame(NCAAFBarCat,NCAAThetaMCat,NCAAMarketCat,NCAAOutcome)
NCAAFBarCatHit <- c()
NCAAFBarCatWeight <- c()
NCAAThetaMCatHit <- c()
NCAAThetaMCatWeight <- c()
NCAAMarketCatHit <- c()
NCAAMarketCatWeight <- c()
NCAACatForecast <- c()
for (i in 1:11){
NCAAFBarCatHit[i] <- mean(NCAAForecasts[abs(NCAAForecasts$NCAAFBarCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)
NCAAFBarCatWeight[i] <- length(NCAAForecasts[abs(NCAAForecasts$NCAAFBarCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)/length(NCAAOutcome)
NCAAThetaMCatHit[i] <- mean(NCAAForecasts[abs(NCAAForecasts$NCAAThetaMCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)
NCAAThetaMCatWeight[i] <- length(NCAAForecasts[abs(NCAAForecasts$NCAAThetaMCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)/length(NCAAOutcome)
NCAAMarketCatHit[i] <- mean(NCAAForecasts[abs(NCAAForecasts$NCAAMarketCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)
NCAAMarketCatWeight[i] <- length(NCAAForecasts[abs(NCAAForecasts$NCAAMarketCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)/length(NCAAOutcome)
NCAACatForecast[i] <- .1*(i-1)
}
# Reliability component
sum(NCAAFBarCatWeight*(NCAAFBarCatHit - NCAACatForecast)^2, na.rm=TRUE) #FBar
sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - NCAACatForecast)^2, na.rm=TRUE) #ThetaM
sum(NCAAMarketCatWeight*(NCAAMarketCatHit - NCAACatForecast)^2, na.rm=TRUE) #Market
# Resolution component
sum(NCAAFBarCatWeight*(NCAAFBarCatHit - obar)^2, na.rm=TRUE) #FBar
sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - obar)^2, na.rm=TRUE) #ThetaM
sum(NCAAMarketCatWeight*(NCAAMarketCatHit - obar)^2, na.rm=TRUE) #Market
#double check: Brier = Reliability - Resolution + Uncertainty
#Brier Skill Score (BSS) = (Resolution - Reliability)/Uncertainty
#FBar
sum(NCAAFBarCatWeight*(NCAAFBarCatHit - NCAACatForecast)^2, na.rm=TRUE) - sum(NCAAFBarCatWeight*(NCAAFBarCatHit - obar)^2, na.rm=TRUE) + obar*(1-obar)
mean((NCAAOutcome-NCAAFBarCat)^2)
(sum(NCAAFBarCatWeight*(NCAAFBarCatHit - obar)^2, na.rm=TRUE)-sum(NCAAFBarCatWeight*(NCAAFBarCatHit - NCAACatForecast)^2, na.rm=TRUE))/(obar*(1-obar)) #BSS
#ThetaM
sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - NCAACatForecast)^2, na.rm=TRUE) - sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - obar)^2, na.rm=TRUE) + obar*(1-obar)
mean((NCAAOutcome-NCAAThetaMCat)^2)
(sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - obar)^2, na.rm=TRUE) - sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - NCAACatForecast)^2, na.rm=TRUE))/(obar*(1-obar)) #BSS
#Market
sum(NCAAMarketCatWeight*(NCAAMarketCatHit - NCAACatForecast)^2, na.rm=TRUE) - sum(NCAAMarketCatWeight*(NCAAMarketCatHit - obar)^2, na.rm=TRUE) + obar*(1-obar)
mean((NCAAOutcome-NCAAMarketCat)^2)
(sum(NCAAMarketCatWeight*(NCAAMarketCatHit - obar)^2, na.rm=TRUE) - sum(NCAAMarketCatWeight*(NCAAMarketCatHit - NCAACatForecast)^2, na.rm=TRUE))/(obar*(1-obar)) #BSS
### Build calibration curves for each method from 50% to 100%
### start by setting the first entry (50% forecast) to the 6th entry from the categories above
NCAAFBarCatHitALT <- NCAAFBarCatHit[6]
NCAAThetaMCatHitALT <- NCAAThetaMCatHit[6]
NCAAMarketCatHitALT <- NCAAMarketCatHit[6]
NCAACatForecastALT <- NCAACatForecast[6]
#for the 5 other categories (60%, 70%, ..., 100%), take weighted average (if any) of the hit rate in entry 6+1 and 1 minus hit rate in entry 6-i from the categories above
for (i in 1:5) {
NCAAFBarCatHitALT[i+1] = sum(NCAAFBarCatWeight[6+i]*NCAAFBarCatHit[6+i],NCAAFBarCatWeight[6-i]*(1-NCAAFBarCatHit[6-i]),na.rm=TRUE)/sum(NCAAFBarCatWeight[6+i],NCAAFBarCatWeight[6-i],na.rm=TRUE)
NCAAThetaMCatHitALT[i+1] = sum(NCAAThetaMCatWeight[6+i]*NCAAThetaMCatHit[6+i],NCAAThetaMCatWeight[6-i]*(1-NCAAThetaMCatHit[6-i]),na.rm=TRUE)/sum(NCAAThetaMCatWeight[6+i],NCAAThetaMCatWeight[6-i],na.rm=TRUE)
NCAAMarketCatHitALT[i+1] = sum(NCAAMarketCatWeight[6+i]*NCAAMarketCatHit[6+i],NCAAMarketCatWeight[6-i]*(1-NCAAMarketCatHit[6-i]),na.rm=TRUE)/sum(NCAAMarketCatWeight[6+i],NCAAMarketCatWeight[6-i],na.rm=TRUE)
NCAACatForecastALT[i+1] = 0.5 + 0.1*i
}
CrowdMethod <- c("FBar","Market","ThetaM")
NCAACalibrationALT <- data.frame(rep(CrowdMethod, each=6),append(append(NCAAFBarCatHitALT,NCAAThetaMCatHitALT,after=6),NCAAMarketCatHitALT,after=6),append(append(NCAACatForecastALT,NCAACatForecastALT,after=6),NCAACatForecastALT,after=6))
colnames(NCAACalibrationALT) <- c("CrowdMethod","HitRate", "ForecastCategory")
ggplot(data=NCAACalibrationALT,aes(x=ForecastCategory, y=HitRate,colour=CrowdMethod,shape=CrowdMethod))+geom_line()+ geom_point(size=5, fill="white")+geom_abline(slope=1,lty=2)+theme_bw(base_size=20)+scale_colour_brewer(palette="Dark2",name="Forecasting\nMethod", labels = c("Simple\nAverage", "\nMarket\n", "Minimal\nPivoting"))+scale_shape_manual(values=c(1,2,3),name="Forecasting\nMethod", labels = c("Simple\nAverage", "\nMarket\n", "Minimal\nPivoting"))+xlim(c(0.46,1))+ylim(c(0.46,1))+xlab("Forecast Category")+ylab("Hit Rate")+scale_x_continuous(breaks = c(0.5,0.6,0.7,0.8,0.9,1))+scale_y_continuous(breaks = c(0.5,0.6,0.7,0.8,0.9,1))+ theme(aspect.ratio=1)
####     STUDY 4 CALCULATIONS     #####
#######################################
NCAAData <- read.csv("Study4.csv")
# generate NCAAThetaN,NCAAThetaS,NCAAThetaNS,NCAAFBar,NCAAThetaM, etc to be filled in later in the loops below
NCAAFBar = c()
NCAAGBar = c()
NCAAIndivAbsErrorF = c()
NCAAIndivBrierF = c()
NCAAFBarImprove = c()
NCAAThetaN = c()
NCAAThetaS = c()
NCAAThetaNS = c()
NCAAThetaM = c()
NCAAThetaNImprove = c()
NCAAThetaSImprove = c()
NCAAThetaNSImprove = c()
NCAAThetaMImprove = c()
#Realizations of the game outcome; Indicator variable for whether Team 1 won the game (or not).
NCAAOutcome = c(1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0)
#market probabilities
NCAAMarketTheta = c(0.981,0.279,0.727,0.795,0.717,0.901,0.598,0.917,0.970,0.588,0.592,0.924,0.631,0.784,0.686,0.951,0.970,0.408,0.596,0.743,0.623,0.907,0.665,0.915,0.933,0.707,0.587,0.951,0.366,0.909,0.545,0.929,0.654,0.394,0.458,0.538,0.765,0.388,0.357,0.431,0.982,0.601,0.731,0.769,0.633,0.925,0.568,0.958,0.978,0.566,0.712,0.796,0.606,0.900,0.687,0.945,0.967,0.532,0.758,0.835,0.596,0.804,0.369,0.974,0.998,0.487,0.646,0.655,0.466,0.880,0.694,0.850,0.918,0.448,0.715,0.142,0.426,0.450,0.670,0.210,0.997,0.39,0.823,0.629,0.534,0.907,0.757,0.972,0.989,0.458,0.879,0.918,0.641,0.747,0.558,0.906,0.986,0.468,0.68,0.847,0.644,0.897,0.349,0.917,0.995,0.357,0.806,0.762,0.476,0.788,0.518,0.973,0.731,0.361,0.589,0.432,0.682,0.517,0.674,0.649)
for (i in 1:120) {
#i=1:80 iterate for game number
#subset the f and g reduced to the set of respondents gave both f and g for game i
datagf <- subset(NCAAData, game == i & f!='NA' & g!='NA')
#the f and g reduced to the set of respondents who gave both f and g and f!=g
datagneqf <- subset(datagf, g!=f)
#compute means of f and g on datagf
datagf$AbsErrorf = abs(datagf$f/100-NCAAMarketTheta[i]) #create variable of absolute individual forecast errors
datagf$Brierf = (datagf$f/100-NCAAOutcome[i])^2 #create variable of individual forecast Brier scores
fbar <- colMeans(datagf[c("f")])
gbar <- colMeans(datagf[c("g")])
NCAAFBar[i] <- fbar/100 #store fbar estimate (simple average), convert to probability
NCAAGBar[i] <- gbar/100 #store fbar estimate (simple average), convert to probability
NCAAThetaM[i] <- (2*fbar-gbar)/100 #store thetaM estimate (minimal pivoting), convert to probability
NCAAIndivBrierF[i] <- mean(datagf$Brierf)
NCAAIndivAbsErrorF[i] <- mean(datagf$AbsErrorf)
NCAAFBarImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAFBar[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
NCAAThetaMImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaM[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR NESTED PIVOTING PROCEDURE
#note "unlist" takes e.g. datagf, which is a data.frame of length 1 and simplifies it to produce a vector which contains all its atomic components
n <- length(unlist(datagf[c("f")]))
d <- length(unlist(datagneqf[c("f")]))
phat <- d/n
#store thetaN estimate, convert to probability
if (phat>0) NCAAThetaN[i] <- (fbar + (fbar-gbar)/phat)/100 else NCAAThetaN[i] <- fbar/100
NCAAThetaNImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaN[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR SYMMETRIC PIVOTING PROCEDURE
#regress g on f (among respondents who gave both f and g)
#save w_hat and its std. error
gonf <- lm(g ~ f, data = datagf)
Swhat <- coef(summary(gonf))["f","Estimate"]
Sse <- coef(summary(gonf))["f","Std. Error"]
# summary(gonf) # show results
#Note to get the t density w/ mean mu and s.d. sigma we need to use 1/sigma*dt((x - mu)/sigma, df)
#also note df = n-2
#function inside numerator for calculating w*
wUpperS <- function(w){ w * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating w*
wLowerS <- function(w){ 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
wS = integrate(wUpperS,0,1)[[1]]/integrate(wLowerS,0,1)[[1]]
#function inside numerator for calculating h*private
privateUpperS <- function(w){ (w^2)*((1-w)^2) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating h*private
privateLowerS <- function(w){ w*((1-w)^2) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hprivateS = integrate(privateUpperS,0,1)[[1]]/integrate(privateLowerS,0,1)[[1]]
#function inside numerator for calculating h*shared
sharedUpperS <- function(w){ (w^3)*(1-w) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating h*shared
sharedLowerS <- function(w){ (w^2)*(1-w) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hsharedS = integrate(sharedUpperS,0,1)[[1]]/integrate(sharedLowerS,0,1)[[1]]
#compute tbarhat
tbarS <- fbar + (fbar-gbar)/hprivateS
#compute shat
shatS <- fbar + (gbar-fbar)/(1-hsharedS)
#compute alpha*
alphaS <- (n * wS) / (n * wS + 1 - wS)
NCAAThetaS[i] <- (alphaS * tbarS + (1 - alphaS) * shatS)/100   #compute thetahat_S, convert to probability
NCAAThetaSImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaS[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR NESTED-SYMMETRIC PIVOTING PROCEDURE
#regress g on f among respondents who gave both f and g and f!=g
gneqf <- lm(g ~ f, data = datagneqf)
NSwhat <- coef(summary(gneqf))["f","Estimate"]
NSse <- coef(summary(gneqf))["f","Std. Error"]
#Note to get the t density w/ mean mu & s.d. sigma we need to use 1/sigma*dt((x - mu)/sigma, df)
#also note df = n-2
#function inside numerator for calculating (wp)*
wpUpperNS <- function(w){ w * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating w*
wpLowerNS <- function(w){ 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
wpNS = integrate(wpUpperNS,0,1)[[1]]/integrate(wpLowerNS,0,1)[[1]]
#function inside numerator for calculating h*private
privateUpperNS <- function(w){ (w^2)*((1-w)^2) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating h*private
privateLowerNS <- function(w){ w*((1-w)^2) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hprivateNS = integrate(privateUpperNS,0,1)[[1]]/integrate(privateLowerNS,0,1)[[1]]
#function inside numerator for calculating h*shared
sharedUpperNS <- function(w){ (w^3)*(1-w) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating h*shared
sharedLowerNS <- function(w){ (w^2)*(1-w) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hsharedNS = integrate(sharedUpperNS,0,1)[[1]]/integrate(sharedLowerNS,0,1)[[1]]
#compute tbarhatNS
tbarNS <- fbar + (fbar-gbar)/hprivateNS
#compute shatNS
shatNS <- fbar + (gbar-fbar)/(1-hsharedNS)
#compute alphaNS
alphaNS <- (n * wpNS) / (n * wpNS + 1 - wpNS)
NCAAThetaNS[i] <- (alphaNS * tbarNS + (1 - alphaNS) * shatNS)/100 #compute thetahat_NS, convert to probability
NCAAThetaNSImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaNS[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
}
#######################################
####     STUDY 4 CALCULATIONS     #####
#######################################
NCAAData <- read.csv("Study4.csv")
# generate NCAAThetaN,NCAAThetaS,NCAAThetaNS,NCAAFBar,NCAAThetaM, etc to be filled in later in the loops below
NCAAFBar = c()
NCAAGBar = c()
NCAAIndivAbsErrorF = c()
NCAAIndivBrierF = c()
NCAAFBarImprove = c()
NCAAThetaN = c()
NCAAThetaS = c()
NCAAThetaNS = c()
NCAAThetaM = c()
NCAAThetaNImprove = c()
NCAAThetaSImprove = c()
NCAAThetaNSImprove = c()
NCAAThetaMImprove = c()
#Realizations of the game outcome; Indicator variable for whether Team 1 won the game (or not).
NCAAOutcome = c(1,0,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,0,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,0,1,0,1,1,0,0,1,0,1,0,0,1,0,1,0,1,1,1,0)
#market probabilities
NCAAMarketTheta = c(0.981,0.279,0.727,0.795,0.717,0.901,0.598,0.917,0.970,0.588,0.592,0.924,0.631,0.784,0.686,0.951,0.970,0.408,0.596,0.743,0.623,0.907,0.665,0.915,0.933,0.707,0.587,0.951,0.366,0.909,0.545,0.929,0.654,0.394,0.458,0.538,0.765,0.388,0.357,0.431,0.982,0.601,0.731,0.769,0.633,0.925,0.568,0.958,0.978,0.566,0.712,0.796,0.606,0.900,0.687,0.945,0.967,0.532,0.758,0.835,0.596,0.804,0.369,0.974,0.998,0.487,0.646,0.655,0.466,0.880,0.694,0.850,0.918,0.448,0.715,0.142,0.426,0.450,0.670,0.210,0.997,0.39,0.823,0.629,0.534,0.907,0.757,0.972,0.989,0.458,0.879,0.918,0.641,0.747,0.558,0.906,0.986,0.468,0.68,0.847,0.644,0.897,0.349,0.917,0.995,0.357,0.806,0.762,0.476,0.788,0.518,0.973,0.731,0.361,0.589,0.432,0.682,0.517,0.674,0.649)
for (i in 1:120) {
#i=1:80 iterate for game number
#subset the f and g reduced to the set of respondents gave both f and g for game i
datagf <- subset(NCAAData, gamenumber == i & f!='NA' & g!='NA')
#the f and g reduced to the set of respondents who gave both f and g and f!=g
datagneqf <- subset(datagf, g!=f)
#compute means of f and g on datagf
datagf$AbsErrorf = abs(datagf$f/100-NCAAMarketTheta[i]) #create variable of absolute individual forecast errors
datagf$Brierf = (datagf$f/100-NCAAOutcome[i])^2 #create variable of individual forecast Brier scores
fbar <- colMeans(datagf[c("f")])
gbar <- colMeans(datagf[c("g")])
NCAAFBar[i] <- fbar/100 #store fbar estimate (simple average), convert to probability
NCAAGBar[i] <- gbar/100 #store fbar estimate (simple average), convert to probability
NCAAThetaM[i] <- (2*fbar-gbar)/100 #store thetaM estimate (minimal pivoting), convert to probability
NCAAIndivBrierF[i] <- mean(datagf$Brierf)
NCAAIndivAbsErrorF[i] <- mean(datagf$AbsErrorf)
NCAAFBarImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAFBar[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
NCAAThetaMImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaM[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR NESTED PIVOTING PROCEDURE
#note "unlist" takes e.g. datagf, which is a data.frame of length 1 and simplifies it to produce a vector which contains all its atomic components
n <- length(unlist(datagf[c("f")]))
d <- length(unlist(datagneqf[c("f")]))
phat <- d/n
#store thetaN estimate, convert to probability
if (phat>0) NCAAThetaN[i] <- (fbar + (fbar-gbar)/phat)/100 else NCAAThetaN[i] <- fbar/100
NCAAThetaNImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaN[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR SYMMETRIC PIVOTING PROCEDURE
#regress g on f (among respondents who gave both f and g)
#save w_hat and its std. error
gonf <- lm(g ~ f, data = datagf)
Swhat <- coef(summary(gonf))["f","Estimate"]
Sse <- coef(summary(gonf))["f","Std. Error"]
# summary(gonf) # show results
#Note to get the t density w/ mean mu and s.d. sigma we need to use 1/sigma*dt((x - mu)/sigma, df)
#also note df = n-2
#function inside numerator for calculating w*
wUpperS <- function(w){ w * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating w*
wLowerS <- function(w){ 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
wS = integrate(wUpperS,0,1)[[1]]/integrate(wLowerS,0,1)[[1]]
#function inside numerator for calculating h*private
privateUpperS <- function(w){ (w^2)*((1-w)^2) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating h*private
privateLowerS <- function(w){ w*((1-w)^2) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hprivateS = integrate(privateUpperS,0,1)[[1]]/integrate(privateLowerS,0,1)[[1]]
#function inside numerator for calculating h*shared
sharedUpperS <- function(w){ (w^3)*(1-w) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#function inside denominator for calculating h*shared
sharedLowerS <- function(w){ (w^2)*(1-w) * 1/Sse*dt((w - Swhat)/Sse, n-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hsharedS = integrate(sharedUpperS,0,1)[[1]]/integrate(sharedLowerS,0,1)[[1]]
#compute tbarhat
tbarS <- fbar + (fbar-gbar)/hprivateS
#compute shat
shatS <- fbar + (gbar-fbar)/(1-hsharedS)
#compute alpha*
alphaS <- (n * wS) / (n * wS + 1 - wS)
NCAAThetaS[i] <- (alphaS * tbarS + (1 - alphaS) * shatS)/100   #compute thetahat_S, convert to probability
NCAAThetaSImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaS[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
###CALCULATIONS FOR NESTED-SYMMETRIC PIVOTING PROCEDURE
#regress g on f among respondents who gave both f and g and f!=g
gneqf <- lm(g ~ f, data = datagneqf)
NSwhat <- coef(summary(gneqf))["f","Estimate"]
NSse <- coef(summary(gneqf))["f","Std. Error"]
#Note to get the t density w/ mean mu & s.d. sigma we need to use 1/sigma*dt((x - mu)/sigma, df)
#also note df = n-2
#function inside numerator for calculating (wp)*
wpUpperNS <- function(w){ w * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating w*
wpLowerNS <- function(w){ 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
wpNS = integrate(wpUpperNS,0,1)[[1]]/integrate(wpLowerNS,0,1)[[1]]
#function inside numerator for calculating h*private
privateUpperNS <- function(w){ (w^2)*((1-w)^2) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating h*private
privateLowerNS <- function(w){ w*((1-w)^2) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hprivateNS = integrate(privateUpperNS,0,1)[[1]]/integrate(privateLowerNS,0,1)[[1]]
#function inside numerator for calculating h*shared
sharedUpperNS <- function(w){ (w^3)*(1-w) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#function inside denominator for calculating h*shared
sharedLowerNS <- function(w){ (w^2)*(1-w) * 1/NSse*dt((w - NSwhat)/NSse, d-2) }
#double brackets needed since integrate gives a list, we only want the 1st element
hsharedNS = integrate(sharedUpperNS,0,1)[[1]]/integrate(sharedLowerNS,0,1)[[1]]
#compute tbarhatNS
tbarNS <- fbar + (fbar-gbar)/hprivateNS
#compute shatNS
shatNS <- fbar + (gbar-fbar)/(1-hsharedNS)
#compute alphaNS
alphaNS <- (n * wpNS) / (n * wpNS + 1 - wpNS)
NCAAThetaNS[i] <- (alphaNS * tbarNS + (1 - alphaNS) * shatNS)/100 #compute thetahat_NS, convert to probability
NCAAThetaNSImprove[i] <- (NCAAIndivAbsErrorF[i] - abs(NCAAMarketTheta[i]-NCAAThetaNS[i]))/NCAAIndivAbsErrorF[i] #percentage improvement over avg. forecast
}
t.test((NCAAOutcome-NCAAFBar)^2,NCAAIndivBrierF,mu=0,paired=TRUE)
mean(NCAAIndivBrierF)
mean((NCAAOutcome-NCAAFBar)^2)
#paired t-test Brier Score based on game results, fbar versus thetaS
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaS)^2,mu=0,paired=TRUE)
mean((NCAAOutcome-NCAAThetaS)^2)
sd((NCAAOutcome-NCAAThetaS)^2)
#paired t-test Brier Score based on game results, fbar versus thetaN
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaN)^2,mu=0,paired=TRUE)
mean((NCAAOutcome-NCAAThetaN)^2)
sd((NCAAOutcome-NCAAThetaN)^2)
#paired t-test Brier Score based on game results, fbar versus thetaNS
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaNS)^2,mu=0,paired=TRUE)
mean((NCAAOutcome-NCAAThetaNS)^2)
sd((NCAAOutcome-NCAAThetaNS)^2)
#paired t-test Brier Score based on game results, fbar versus thetaM
mean((NCAAOutcome-NCAAThetaM)^2)
sd((NCAAOutcome-NCAAThetaM)^2)
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAThetaM)^2,mu=0,paired=TRUE)
#Market probabilities Brier Score based on game results
mean((NCAAOutcome-NCAAMarketTheta)^2)
sd((NCAAOutcome-NCAAMarketTheta)^2)
t.test((NCAAOutcome-NCAAFBar)^2,(NCAAOutcome-NCAAMarketTheta)^2,mu=0,paired=TRUE) #fbar versus NCAAMarketTheta
#f_i versus fbar closer to market probabilities
mean(NCAAIndivAbsErrorF)
t.test(NCAAIndivAbsErrorF,abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar distance to market probabilities
mean(abs(NCAAMarketTheta-NCAAFBar))
sd(abs(NCAAMarketTheta-NCAAFBar))
#fbar versus thetaS closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaS))
sd(abs(NCAAMarketTheta-NCAAThetaS))
t.test(abs(NCAAMarketTheta-NCAAThetaS),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar versus thetaN closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaN))
sd(abs(NCAAMarketTheta-NCAAThetaN))
t.test(abs(NCAAMarketTheta-NCAAThetaN),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar versus thetaNS closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaNS))
sd(abs(NCAAMarketTheta-NCAAThetaNS))
t.test(abs(NCAAMarketTheta-NCAAThetaNS),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
#fbar versus thetaM closer to market probabilities
mean(abs(NCAAMarketTheta-NCAAThetaM))
sd(abs(NCAAMarketTheta-NCAAThetaM))
t.test(abs(NCAAMarketTheta-NCAAThetaM),abs(NCAAMarketTheta-NCAAFBar),mu=0,paired=TRUE)
# Brier Score Decomposition
# Uncertainty component obar*(1-obar)
obar <- mean(NCAAOutcome)
obar*(1-obar)
# Use function below to map the crowd forecasts to one of 11 categories: <5% -> 0%, 5%-14.99% -> 10%, 15%-24.99% -> 20%, ..., >=95% -> 100%,
ProbCategory <- function(p){floor(10*(pmin(pmax(p+0.05,0),1)))/10}
NCAAFBarCat <- ProbCategory(NCAAFBar)
NCAAThetaMCat <- ProbCategory(NCAAThetaM)
NCAAMarketCat <- ProbCategory(NCAAMarketTheta)
NCAAForecasts <- data.frame(NCAAFBarCat,NCAAThetaMCat,NCAAMarketCat,NCAAOutcome)
NCAAFBarCatHit <- c()
NCAAFBarCatWeight <- c()
NCAAThetaMCatHit <- c()
NCAAThetaMCatWeight <- c()
NCAAMarketCatHit <- c()
NCAAMarketCatWeight <- c()
NCAACatForecast <- c()
for (i in 1:11){
NCAAFBarCatHit[i] <- mean(NCAAForecasts[abs(NCAAForecasts$NCAAFBarCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)
NCAAFBarCatWeight[i] <- length(NCAAForecasts[abs(NCAAForecasts$NCAAFBarCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)/length(NCAAOutcome)
NCAAThetaMCatHit[i] <- mean(NCAAForecasts[abs(NCAAForecasts$NCAAThetaMCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)
NCAAThetaMCatWeight[i] <- length(NCAAForecasts[abs(NCAAForecasts$NCAAThetaMCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)/length(NCAAOutcome)
NCAAMarketCatHit[i] <- mean(NCAAForecasts[abs(NCAAForecasts$NCAAMarketCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)
NCAAMarketCatWeight[i] <- length(NCAAForecasts[abs(NCAAForecasts$NCAAMarketCat - .1*(i-1)) < 0.01, ]$NCAAOutcome)/length(NCAAOutcome)
NCAACatForecast[i] <- .1*(i-1)
}
# Reliability component
sum(NCAAFBarCatWeight*(NCAAFBarCatHit - NCAACatForecast)^2, na.rm=TRUE) #FBar
sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - NCAACatForecast)^2, na.rm=TRUE) #ThetaM
sum(NCAAMarketCatWeight*(NCAAMarketCatHit - NCAACatForecast)^2, na.rm=TRUE) #Market
# Resolution component
sum(NCAAFBarCatWeight*(NCAAFBarCatHit - obar)^2, na.rm=TRUE) #FBar
sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - obar)^2, na.rm=TRUE) #ThetaM
sum(NCAAMarketCatWeight*(NCAAMarketCatHit - obar)^2, na.rm=TRUE) #Market
#double check: Brier = Reliability - Resolution + Uncertainty
#Brier Skill Score (BSS) = (Resolution - Reliability)/Uncertainty
#FBar
sum(NCAAFBarCatWeight*(NCAAFBarCatHit - NCAACatForecast)^2, na.rm=TRUE) - sum(NCAAFBarCatWeight*(NCAAFBarCatHit - obar)^2, na.rm=TRUE) + obar*(1-obar)
mean((NCAAOutcome-NCAAFBarCat)^2)
(sum(NCAAFBarCatWeight*(NCAAFBarCatHit - obar)^2, na.rm=TRUE)-sum(NCAAFBarCatWeight*(NCAAFBarCatHit - NCAACatForecast)^2, na.rm=TRUE))/(obar*(1-obar)) #BSS
#ThetaM
sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - NCAACatForecast)^2, na.rm=TRUE) - sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - obar)^2, na.rm=TRUE) + obar*(1-obar)
mean((NCAAOutcome-NCAAThetaMCat)^2)
(sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - obar)^2, na.rm=TRUE) - sum(NCAAThetaMCatWeight*(NCAAThetaMCatHit - NCAACatForecast)^2, na.rm=TRUE))/(obar*(1-obar)) #BSS
#Market
sum(NCAAMarketCatWeight*(NCAAMarketCatHit - NCAACatForecast)^2, na.rm=TRUE) - sum(NCAAMarketCatWeight*(NCAAMarketCatHit - obar)^2, na.rm=TRUE) + obar*(1-obar)
mean((NCAAOutcome-NCAAMarketCat)^2)
(sum(NCAAMarketCatWeight*(NCAAMarketCatHit - obar)^2, na.rm=TRUE) - sum(NCAAMarketCatWeight*(NCAAMarketCatHit - NCAACatForecast)^2, na.rm=TRUE))/(obar*(1-obar)) #BSS
### Build calibration curves for each method from 50% to 100%
### start by setting the first entry (50% forecast) to the 6th entry from the categories above
NCAAFBarCatHitALT <- NCAAFBarCatHit[6]
NCAAThetaMCatHitALT <- NCAAThetaMCatHit[6]
NCAAMarketCatHitALT <- NCAAMarketCatHit[6]
NCAACatForecastALT <- NCAACatForecast[6]
#for the 5 other categories (60%, 70%, ..., 100%), take weighted average (if any) of the hit rate in entry 6+1 and 1 minus hit rate in entry 6-i from the categories above
for (i in 1:5) {
NCAAFBarCatHitALT[i+1] = sum(NCAAFBarCatWeight[6+i]*NCAAFBarCatHit[6+i],NCAAFBarCatWeight[6-i]*(1-NCAAFBarCatHit[6-i]),na.rm=TRUE)/sum(NCAAFBarCatWeight[6+i],NCAAFBarCatWeight[6-i],na.rm=TRUE)
NCAAThetaMCatHitALT[i+1] = sum(NCAAThetaMCatWeight[6+i]*NCAAThetaMCatHit[6+i],NCAAThetaMCatWeight[6-i]*(1-NCAAThetaMCatHit[6-i]),na.rm=TRUE)/sum(NCAAThetaMCatWeight[6+i],NCAAThetaMCatWeight[6-i],na.rm=TRUE)
NCAAMarketCatHitALT[i+1] = sum(NCAAMarketCatWeight[6+i]*NCAAMarketCatHit[6+i],NCAAMarketCatWeight[6-i]*(1-NCAAMarketCatHit[6-i]),na.rm=TRUE)/sum(NCAAMarketCatWeight[6+i],NCAAMarketCatWeight[6-i],na.rm=TRUE)
NCAACatForecastALT[i+1] = 0.5 + 0.1*i
}
CrowdMethod <- c("FBar","Market","ThetaM")
NCAACalibrationALT <- data.frame(rep(CrowdMethod, each=6),append(append(NCAAFBarCatHitALT,NCAAThetaMCatHitALT,after=6),NCAAMarketCatHitALT,after=6),append(append(NCAACatForecastALT,NCAACatForecastALT,after=6),NCAACatForecastALT,after=6))
colnames(NCAACalibrationALT) <- c("CrowdMethod","HitRate", "ForecastCategory")
ggplot(data=NCAACalibrationALT,aes(x=ForecastCategory, y=HitRate,colour=CrowdMethod,shape=CrowdMethod))+geom_line()+ geom_point(size=5, fill="white")+geom_abline(slope=1,lty=2)+theme_bw(base_size=20)+scale_colour_brewer(palette="Dark2",name="Forecasting\nMethod", labels = c("Simple\nAverage", "\nMarket\n", "Minimal\nPivoting"))+scale_shape_manual(values=c(1,2,3),name="Forecasting\nMethod", labels = c("Simple\nAverage", "\nMarket\n", "Minimal\nPivoting"))+xlim(c(0.46,1))+ylim(c(0.46,1))+xlab("Forecast Category")+ylab("Hit Rate")+scale_x_continuous(breaks = c(0.5,0.6,0.7,0.8,0.9,1))+scale_y_continuous(breaks = c(0.5,0.6,0.7,0.8,0.9,1))+ theme(aspect.ratio=1)
